/*************************************************************************
 *                                                                       *
 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
 * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
 *                                                                       *
 * This library is free software; you can redistribute it and/or         *
 * modify it under the terms of EITHER:                                  *
 *   (1) The GNU Lesser General Public License as published by the Free  *
 *       Software Foundation; either version 2.1 of the License, or (at  *
 *       your option) any later version. The text of the GNU Lesser      *
 *       General Public License is included with this library in the     *
 *       file LICENSE.TXT.                                               *
 *   (2) The BSD-style license that is included with this library in     *
 *       the file LICENSE-BSD.TXT.                                       *
 *                                                                       *
 * This library is distributed in the hope that it will be useful,       *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
 * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
 *                                                                       *
 *************************************************************************/

/*

TODO
----

* gettimeofday() and the pentium time stamp counter return the real time,
  not the process time. fix this somehow!

*/

#include <ode/common.h>
#include <ode/timer.h>
#include "config.h"
#include "common.h"


// misc defines
#define ALLOCA dALLOCA16

//****************************************************************************
// implementation for windows based on the multimedia performance counter.

#ifdef WIN32

#include "windows.h"

static inline void getClockCount (unsigned long cc[2])
{
    LARGE_INTEGER a;
    QueryPerformanceCounter (&a);
    cc[0] = a.LowPart;
    cc[1] = a.HighPart;
}


static inline void serialize()
{
}


static inline double loadClockCount (unsigned long cc[2])
{
    LARGE_INTEGER a;
    a.LowPart = cc[0];
    a.HighPart = cc[1];
    return double(a.QuadPart);
}


double dTimerResolution()
{
    return 1.0/dTimerTicksPerSecond();
}


double dTimerTicksPerSecond()
{
    static int query=0;
    static double hz=0.0;
    if (!query) {
        LARGE_INTEGER a;
        QueryPerformanceFrequency (&a);
        hz = double(a.QuadPart);
        query = 1;
    }
    return hz;
}

#endif

//****************************************************************************
// implementation based on the pentium time stamp counter. the timer functions
// can be serializing or non-serializing. serializing will ensure that all
// instructions have executed and data has been written back before the cpu
// time stamp counter is read. the CPUID instruction is used to serialize.

#if defined(PENTIUM) && !defined(WIN32)

// we need to know the clock rate so that the timing function can report
// accurate times. this number only needs to be set accurately if we're
// doing performance tests and care about real-world time numbers - otherwise,
// just ignore this. i have not worked out how to determine this number
// automatically yet.

#define PENTIUM_HZ (500e6)

static inline void getClockCount (unsigned long cc[2])
{
#ifndef X86_64_SYSTEM	
    asm volatile (
        "rdtsc\n"
        "movl %%eax,(%%esi)\n"
        "movl %%edx,4(%%esi)\n"
        : : "S" (cc) : "%eax","%edx","cc","memory");
#else
    asm volatile (
        "rdtsc\n"
        "movl %%eax,(%%rsi)\n"
        "movl %%edx,4(%%rsi)\n"
        : : "S" (cc) : "%eax","%edx","cc","memory");
#endif  
}


static inline void serialize()
{
#ifndef X86_64_SYSTEM
    asm volatile (
        "mov $0,%%eax\n"
        "push %%ebx\n"
        "cpuid\n"
        "pop %%ebx\n"
        : : : "%eax","%ecx","%edx","cc","memory");
#else
    asm volatile (
        "mov $0,%%rax\n"
        "push %%rbx\n"
        "cpuid\n"
        "pop %%rbx\n"
        : : : "%rax","%rcx","%rdx","cc","memory");
#endif
}


static inline double loadClockCount (unsigned long a[2])
{
    double ret;
#ifndef X86_64_SYSTEM
    asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
    "cc","memory");
#else
    asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
    "cc","memory");
#endif  
    return ret;
}


double dTimerResolution()
{
    return 1.0/PENTIUM_HZ;
}


double dTimerTicksPerSecond()
{
    return PENTIUM_HZ;
}

#endif

//****************************************************************************
// otherwise, do the implementation based on gettimeofday().

#if !defined(PENTIUM) && !defined(WIN32)

#ifndef macintosh

#include <sys/time.h>
#include <unistd.h>


static inline void getClockCount (unsigned long cc[2])
{
    struct timeval tv;
    gettimeofday (&tv,0);
    cc[0] = tv.tv_usec;
    cc[1] = tv.tv_sec;
}

#else // macintosh

#include <CoreServices/CoreServices.h>
#include <ode/Timer.h>

static inline void getClockCount (unsigned long cc[2])
{
    UnsignedWide ms;
    Microseconds (&ms);
    cc[1] = ms.lo / 1000000;
    cc[0] = ms.lo - ( cc[1] * 1000000 );
}

#endif


static inline void serialize()
{
}


static inline double loadClockCount (unsigned long a[2])
{
    return a[1]*1.0e6 + a[0];
}


double dTimerResolution()
{
    unsigned long cc1[2],cc2[2];
    getClockCount (cc1);
    do {
        getClockCount (cc2);
    }
    while (cc1[0]==cc2[0] && cc1[1]==cc2[1]);
    do {
        getClockCount (cc1);
    }
    while (cc1[0]==cc2[0] && cc1[1]==cc2[1]);
    double t1 = loadClockCount (cc1);
    double t2 = loadClockCount (cc2);
    return (t1-t2) / dTimerTicksPerSecond();
}


double dTimerTicksPerSecond()
{
    return 1000000;
}

#endif

//****************************************************************************
// stop watches

void dStopwatchReset (dStopwatch *s)
{
    s->time = 0;
    s->cc[0] = 0;
    s->cc[1] = 0;
}


void dStopwatchStart (dStopwatch *s)
{
    serialize();
    getClockCount (s->cc);
}


void dStopwatchStop  (dStopwatch *s)
{
    unsigned long cc[2];
    serialize();
    getClockCount (cc);
    double t1 = loadClockCount (s->cc);
    double t2 = loadClockCount (cc);
    s->time += t2-t1;
}


double dStopwatchTime (dStopwatch *s)
{
    return s->time / dTimerTicksPerSecond();
}

//****************************************************************************
// code timers

// maximum number of events to record
#define MAXNUM 100

static int num = 0;		// number of entries used in event array
static struct {
    unsigned long cc[2];		// clock counts
    double total_t;		// total clocks used in this slot.
    double total_p;		// total percentage points used in this slot.
    int count;			// number of times this slot has been updated.
    const char *description;		// pointer to static string
} event[MAXNUM];


// make sure all slot totals and counts reset to 0 at start

static void initSlots()
{
    static int initialized=0;
    if (!initialized) {
        for (int i=0; i<MAXNUM; i++) {
            event[i].count = 0;
            event[i].total_t = 0;
            event[i].total_p = 0;
        }
        initialized = 1;
    }
}


void dTimerStart (const char *description)
{
    initSlots();
    event[0].description = const_cast<char*> (description);
    num = 1;
    serialize();
    getClockCount (event[0].cc);
}


void dTimerNow (const char *description)
{
    if (num < MAXNUM) {
        // do not serialize
        getClockCount (event[num].cc);
        event[num].description = const_cast<char*> (description);
        num++;
    }
}


void dTimerEnd()
{
    if (num < MAXNUM) {
        serialize();
        getClockCount (event[num].cc);
        event[num].description = "TOTAL";
        num++;
    }
}

//****************************************************************************
// print report

static void fprintDoubleWithPrefix (FILE *f, double a, const char *fmt)
{
    if (a >= 0.999999) {
        fprintf (f,fmt,a);
        return;
    }
    a *= 1000.0;
    if (a >= 0.999999) {
        fprintf (f,fmt,a);
        fprintf (f,"m");
        return;
    }
    a *= 1000.0;
    if (a >= 0.999999) {
        fprintf (f,fmt,a);
        fprintf (f,"u");
        return;
    }
    a *= 1000.0;
    fprintf (f,fmt,a);
    fprintf (f,"n");
}


void dTimerReport (FILE *fout, int average)
{
    int i;
    sizeint maxl;
    double ccunit = 1.0/dTimerTicksPerSecond();
    fprintf (fout,"\nTimer Report (");
    fprintDoubleWithPrefix (fout,ccunit,"%.2f ");
    fprintf (fout,"s resolution)\n------------\n");
    if (num < 1) return;

    // get maximum description length
    maxl = 0;
    for (i=0; i<num; i++) {
        sizeint l = strlen (event[i].description);
        if (l > maxl) maxl = l;
    }

    // calculate total time
    double t1 = loadClockCount (event[0].cc);
    double t2 = loadClockCount (event[num-1].cc);
    double total = t2 - t1;
    if (total <= 0) total = 1;

    // compute time difference for all slots except the last one. update totals
    double *times = (double*) ALLOCA (num * sizeof(double));
    for (i=0; i < (num-1); i++) {
        double t1 = loadClockCount (event[i].cc);
        double t2 = loadClockCount (event[i+1].cc);
        times[i] = t2 - t1;
        event[i].count++;
        event[i].total_t += times[i];
        event[i].total_p += times[i]/total * 100.0;
    }

    // print report (with optional averages)
    for (i=0; i<num; i++) {
        double t,p;
        if (i < (num-1)) {
            t = times[i];
            p = t/total * 100.0;
        }
        else {
            t = total;
            p = 100.0;
        }
        fprintf (fout,"%-*s %7.2fms %6.2f%%",(int)maxl,event[i].description,
            t*ccunit * 1000.0, p);
        if (average && i < (num-1)) {
            fprintf (fout,"  (avg %7.2fms %6.2f%%)",
                (event[i].total_t / event[i].count)*ccunit * 1000.0,
                event[i].total_p / event[i].count);
        }
        fprintf (fout,"\n");
    }
    fprintf (fout,"\n");
}
